source('00_util_scripts/mod_seurat.R')
source('00_util_scripts/mod_bplot.R')
library(data.table)

glyco.enz <- read_csv('mission/Fc_glyco/Ig.Fc.glyco.enyzme.csv')

list.enz <- glyco.enz |>
  summarise(data = list(gene), .by = glycosylation) |>
  pull(data, name = glycosylation)

data <- fread("CRC-I/data/GSE146771_CRC.Leukocyte.10x.TPM.txt.gz")

data[1:5, 1:5]

data <- column_to_rownames(data, 'V1')

data <- CreateSeuratObject(sprs.count, names.field = 3)

data$mito.ratio <- data |> PercentageFeatureSet('^MT-')

data |> VlnPlot('mito.ratio')

# load metadata
tenx_meta <- read_delim("CRC-I/data/GSE146771_CRC.Leukocyte.10x.Metadata.txt.gz")

tenx_meta$Sample |> unique()

snp_meta <- read_delim('CRC-I/data/zhang18.20.G2R.I2T.tsv')

tenx_meta |>
  left_join(snp_meta, join_by(Sample == id)) |>
  relocate(IGHG1.G396R) |>
  ggplot(aes(IGHG1.G396R, fill = Sample)) +
  geom_bar(position = 'fill')

tenx_meta |>
  filter(Sample == 'P1025')

ss2_meta <- read_delim("CRC-I/data/GSE146771_CRC.Leukocyte.Smart-seq2.Metadata.txt.gz")

ss2_meta |>
  left_join(snp_meta, join_by(Sample == id)) |>
  relocate(IGHG1.G396R) |>
  ggplot(aes(IGHG1.G396R, fill = Sample)) +
  geom_bar()

tenx_meta_df <- tenx_meta |>
  left_join(snp_meta, join_by(Sample == id)) |>
  relocate(IGHG1.G396R) |>
  column_to_rownames('CellName')

data <- Read10X_h5('CRC-I/data/zhang2020.crc.10x.scaled.count.h5')

sobj <- CreateSeuratObject(data, meta.data = tenx_meta_df,
                           names.field = 3)

# assign I232T genotype in tenx_meta
sobj$mitoRatio <- sobj |> PercentageFeatureSet("^MT-")

sobj |>
  VlnPlot('mitoRatio', group.by = 'IGHG1.G396R')

sobj@assays$RNA <- GetAssayData(sobj) |>
  CreateAssayObject(data = _)

sobj <- sobj |> NormalizeData()

sobj |>
  DotPlot(list.enz, group.by = 'Global_Cluster',
          cols = 'RdYlBu') +
  RotatedAxis()

sobj |>
  DotPlot2d('ST6GAL1', Global_Cluster, IGHG1.G396R) +
  labs(x = 'Cell type', y = 'IGHG1-G396R genotype',
       title = 'ST6GAL1 expression in CRC') +
  RotatedAxis()

# only B cell ------------
sobj.b <- sobj |>
  filter(Global_Cluster == 'B cell')

sobj.b <- sobj.b |>
  mutate(Sub_Cluster = str_remove(Sub_Cluster, '.+_'),
         type.fine = str_remove(Sub_Cluster, '-.+'))

sobj.b |>
  DotPlot(list.enz, group.by = 'type.fine',
          cols = 'RdYlBu') +
  RotatedAxis()

sobj.b |>
  DotPlot(list.enz, group.by = 'Tissue',
          cols = 'RdYlBu', scale = F) +
  RotatedAxis()

sobj.b <- sobj.b |>
  mutate(subgroup = str_c(Tissue, '_', type.fine))

meta.count.b <- sobj.b |>
  summarise(n = n(), .by = c(subgroup, IGHG1.G396R)) |>
  pivot_wider(id_cols = 1, names_from = IGHG1.G396R, values_from = n)

bad.subgroup <- meta.count.b |>
  filter(is.na(RR))

tissue.rrvgx <- sobj.b |>
  filter(Tissue != 'P') |>
  FindMarkersAcrossVar(split.by = 'Tissue', group.by = 'IGHG1.G396R',
                       ident.1 = 'RR')

tissue.rrvgx |>
  filter(gene %in% glyco.enz$gene) |>
  write_csv('mission/Fc_glyco/zhang20.crc.tissue.rrvgx.csv')

tissue.rrvgx |>
  filter(gene %in% glyco.enz$gene, p_val_adj < .05)

subgroup.rrvgx <- sobj.b |>
  filter(!(subgroup %in% bad.subgroup$subgroup)) |>
  FindMarkersAcrossVar(split.by = 'subgroup', group.by = 'IGHG1.G396R',
                       ident.1 = 'RR')

subgroup.rrvgx |>
  filter(gene %in% glyco.enz$gene, p_val < .05)

subgroup.rrvgx |>
  filter(gene %in% glyco.enz$gene) |>
  write_csv('mission/Fc_glyco/zhang20.crc.tissue.type.rrvgx.csv')

# plot ----------
subgroup.rrvgx <-
  read_csv('mission/Fc_glyco/zhang20.crc.tissue.type.rrvgx.csv')

subgroup.rrvgx <- subgroup.rrvgx |>
  inner_join(glyco.enz) |>
  filter(str_detect(cluster, 'N')) |>
  mutate(cluster = case_match(cluster,
                              'N_PlasmaB' ~ 'Plasma cell',
                              'N_GCBCell' ~ 'GC B cell',
                              'N_FollicularB' ~ 'Follicular B cell',
                              .default = 'GALT B cell')) 

subgroup.rrvgx |>
  ggplot(aes(gene, cluster, fill = avg_log2FC, size = -log10(p_val_adj))) +
  geom_point(shape = 21, color = 'black') +
  facet_wrap(vars(glycosylation), scales = 'free_x') +
  scale_fill_gradient2(low = 'blue', high = 'red') +
  theme_bw() +
  labs_pubr() +
  labs(title = 'Fc glycolysation enzyme expression in normal colon B cell\nRR vs GG+GR',
       subtitle = 'GSE146771', y = 'Cell type')

subgroup.rrvgx |>
  ggplot(aes(gene, cluster, color = avg_log2FC, size = -log10(p_val_adj))) +
  geom_point() +
  geom_text(aes(label = signif(avg_log2FC, 2)), color = 'black', size = 4) +
  facet_wrap(vars(glycosylation), scales = 'free_x') +
  scale_color_gradient2(low = 'blue', high = 'red') +
  theme_bw() +
  labs_pubr() +
  labs(title = 'Fc glycolysation enzyme expression in normal colon B cell\nRR vs GG+GR',
       subtitle = 'GSE146771', y = 'Cell type')
